<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" data-whc_version="25.0">
    <head><link rel="shortcut icon" href="../../../oxygen-webhelp/template/images/favicon.png"/><link rel="icon" href="../../../oxygen-webhelp/template/images/favicon.png"/><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><meta name="copyright" content="(C) Copyright 2024"/><meta name="generator" content="DITA-OT"/><meta name="description" content="Cluster EMR batch and cluster batch mode pipelines can process data from Amazon S3. The requirements for cluster pipelines that read from Amazon S3 depend on the following batch modes: Cluster EMR ..."/><meta name="prodname" content="Data Collector"/><meta name="version" content="3"/><meta name="release" content="16"/><meta name="modification" content="0"/>        
      <title>Amazon S3 Requirements</title><!--  Generated with Oxygen version 25.1, build number 2023042410.  --><meta name="wh-path2root" content="../../../"/><meta name="wh-toc-id" content="concept_opj_jmf_f2b-d16893e65073"/><meta name="wh-source-relpath" content="datacollector/UserGuide/Cluster_Mode/AmazonS3Requirements.dita"/><meta name="wh-out-relpath" content="datacollector/UserGuide/Cluster_Mode/AmazonS3Requirements.html"/>

    <link rel="stylesheet" type="text/css" href="../../../oxygen-webhelp/app/commons.css?buildId=2023042410"/>
    <link rel="stylesheet" type="text/css" href="../../../oxygen-webhelp/app/topic.css?buildId=2023042410"/>

    <script src="../../../oxygen-webhelp/app/options/properties.js?buildId=20240802104629"></script>
    <script src="../../../oxygen-webhelp/app/localization/strings.js?buildId=2023042410"></script>
    <script src="../../../oxygen-webhelp/app/search/index/keywords.js?buildId=20240802104629"></script>
    <script defer="defer" src="../../../oxygen-webhelp/app/commons.js?buildId=2023042410"></script>
    <script defer="defer" src="../../../oxygen-webhelp/app/topic.js?buildId=2023042410"></script>
<link rel="stylesheet" type="text/css" href="../../../oxygen-webhelp/template/light.css?buildId=2023042410"/><link rel="stylesheet" type="text/css" href="../../../skin.css"/></head>

    <body class="wh_topic_page frmBody">
        
        
        

        
<nav class="navbar navbar-default wh_header" data-whc_version="25.0">
    <div class="container-fluid">
        <div class="wh_header_flex_container navbar-nav navbar-expand-md navbar-dark">
            <div class="wh_logo_and_publication_title_container">
                <div class="wh_logo_and_publication_title">
                    
                    <!--
                            This component will be generated when the next parameters are specified in the transformation scenario:
                            'webhelp.logo.image' and 'webhelp.logo.image.target.url'.
                            See: http://oxygenxml.com/doc/versions/17.1/ug-editor/#topics/dita_webhelp_output.html.
                    -->
                    
                    <div class=" wh_publication_title "><a href="../../../index.html"><span class="booktitle">  <span class="ph mainbooktitle"><span class="ph">Data Collector</span> User Guide</span>  </span></a></div>
                    
                </div>
                
                <!-- The menu button for mobile devices is copied in the output only when the 'webhelp.show.top.menu' parameter is set to 'yes' -->
                
            </div>

            <div class="wh_top_menu_and_indexterms_link collapse navbar-collapse">
                
                
                <div class=" wh_indexterms_link "><a href="../../../indexTerms.html" title="Index" aria-label="Go to index terms page"><span>Index</span></a></div>
                
            </div>
        </div>
    </div>
</nav>

        <div class=" wh_search_input navbar-form wh_topic_page_search search " role="form">


<form id="searchForm" method="get" role="search" action="../../../search.html"><div><input type="search" placeholder="Search " class="wh_search_textfield" id="textToSearch" name="searchQuery" aria-label="Search query" required="required"/><button type="submit" class="wh_search_button" aria-label="Search"><span class="search_input_text">Search</span></button></div></form>

</div>
        
        <div class="container-fluid">
            <div class="row">

                <nav class="wh_tools d-print-none">
                    
<div data-tooltip-position="bottom" class=" wh_breadcrumb "><ol class="d-print-none"><li><span class="home"><a href="../../../index.html"><span>Home</span></a></span></li><li><div class="topicref" data-id="concept_fpz_5r4_vs"><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines_title.html">Cluster Pipelines</a></div></div></li><li class="active"><div class="topicref" data-id="concept_opj_jmf_f2b"><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/AmazonS3Requirements.html#concept_opj_jmf_f2b">Amazon S3 Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li></ol></div>



                    <div class="wh_right_tools "><button class="wh_hide_highlight" aria-label="Toggle search highlights" title="Toggle search highlights"></button><button class="webhelp_expand_collapse_sections" data-next-state="collapsed" aria-label="Collapse sections" title="Collapse sections"></button><div class=" wh_navigation_links "><span id="topic_navigation_links" class="navheader">
  
<span class="navprev"><a class="- topic/link link" href="../../../datacollector/UserGuide/Cluster_Mode/HDFSRequirements.html#task_akz_w5b_ws" title="HDFS Requirements" aria-label="Previous topic: HDFS Requirements" rel="prev"></a></span>  
<span class="navnext"><a class="- topic/link link" href="../../../datacollector/UserGuide/Cluster_Mode/StageLimitations.html#concept_pdf_r5y_fz" title="Cluster Pipeline Limitations" aria-label="Next topic: Cluster Pipeline Limitations" rel="next"></a></span>  </span></div>
<!--External resource link-->
<div class=" wh_print_link print d-none d-md-inline-block "><button onClick="window.print()" title="Print this page" aria-label="Print this page"></button></div>
                        
                        
                        
                        
                    </div>
                </nav>
            </div>

            

<div class="wh_content_area">
                <div class="row">
                    


                        <nav role="navigation" id="wh_publication_toc" class="col-lg-3 col-md-3 col-sm-12 d-md-block d-none d-print-none">
<div id="wh_publication_toc_content">


                            <div class=" wh_publication_toc " data-tooltip-position="right"><span class="expand-button-action-labels"><span id="button-expand-action" role="button" aria-label="Expand"></span><span id="button-collapse-action" role="button" aria-label="Collapse"></span><span id="button-pending-action" role="button" aria-label="Pending"></span></span><ul role="tree" aria-label="Table of Contents"><li role="treeitem" aria-expanded="false"><div data-tocid="concept_htw_ghg_jq-d16893e53" class="topicref" data-id="concept_htw_ghg_jq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_htw_ghg_jq-d16893e53-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Getting_Started/GettingStarted_Title.html#concept_htw_ghg_jq" id="concept_htw_ghg_jq-d16893e53-link">Getting Started</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_l2v_nlp_mpb-d16893e331" class="topicref" data-id="concept_l2v_nlp_mpb" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_l2v_nlp_mpb-d16893e331-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/ReleaseNotes/ReleaseNotes.html#concept_l2v_nlp_mpb" id="concept_l2v_nlp_mpb-d16893e331-link">Release Notes</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_l4q_flb_kr-d16893e2582" class="topicref" data-id="concept_l4q_flb_kr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_l4q_flb_kr-d16893e2582-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Installation/Install_title.html" id="concept_l4q_flb_kr-d16893e2582-link">Installation</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_ylh_yyz_ky-d16893e3984" class="topicref" data-id="concept_ylh_yyz_ky" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_ylh_yyz_ky-d16893e3984-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Configuration/Config_title.html" id="concept_ylh_yyz_ky-d16893e3984-link">Configuration</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_ejk_f1f_5v-d16893e7058" class="topicref" data-id="concept_ejk_f1f_5v" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_ejk_f1f_5v-d16893e7058-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Upgrade/Upgrade_title.html" id="concept_ejk_f1f_5v-d16893e7058-link">Upgrade</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_qsw_cjy_bt-d16893e10103" class="topicref" data-id="concept_qsw_cjy_bt" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_qsw_cjy_bt-d16893e10103-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Design/PipelineDesign_title.html" id="concept_qsw_cjy_bt-d16893e10103-link">Pipeline Concepts and Design</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_qn1_wn4_kq-d16893e11199" class="topicref" data-id="concept_qn1_wn4_kq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_qn1_wn4_kq-d16893e11199-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Configuration/PipelineConfiguration_title.html" id="concept_qn1_wn4_kq-d16893e11199-link">Pipeline Configuration</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_hdr_gyw_41b-d16893e13057" class="topicref" data-id="concept_hdr_gyw_41b" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_hdr_gyw_41b-d16893e13057-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Data_Formats/DataFormats-Title.html" id="concept_hdr_gyw_41b-d16893e13057-link">Data Formats</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_yjl_nc5_jq-d16893e14164" class="topicref" data-id="concept_yjl_nc5_jq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_yjl_nc5_jq-d16893e14164-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Origins/Origins_title.html" id="concept_yjl_nc5_jq-d16893e14164-link">Origins</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_yjl_nc5_jq-d16893e35197" class="topicref" data-id="concept_yjl_nc5_jq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_yjl_nc5_jq-d16893e35197-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Processors/Processors_title.html" id="concept_yjl_nc5_jq-d16893e35197-link">Processors</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_agj_cfj_br-d16893e44037" class="topicref" data-id="concept_agj_cfj_br" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_agj_cfj_br-d16893e44037-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Destinations/Destinations-title.html" id="concept_agj_cfj_br-d16893e44037-link">Destinations</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_umc_1lk_fx-d16893e56072" class="topicref" data-id="concept_umc_1lk_fx" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_umc_1lk_fx-d16893e56072-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Executors/Executors-title.html" id="concept_umc_1lk_fx-d16893e56072-link">Executors</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_xxd_f5r_kx-d16893e59696" class="topicref" data-id="concept_xxd_f5r_kx" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_xxd_f5r_kx-d16893e59696-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Event_Handling/EventFramework-Title.html#concept_xxd_f5r_kx" id="concept_xxd_f5r_kx-d16893e59696-link">Dataflow Triggers</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_zq5_pb4_flb-d16893e60134" class="topicref" data-id="concept_zq5_pb4_flb" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_zq5_pb4_flb-d16893e60134-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Solutions/Solutions-title.html" id="concept_zq5_pb4_flb-d16893e60134-link">Solutions</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_ugp_kwf_xw-d16893e61337" class="topicref" data-id="concept_ugp_kwf_xw" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_ugp_kwf_xw-d16893e61337-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/DPM/DPM_title.html" id="concept_ugp_kwf_xw-d16893e61337-link">StreamSets Control Hub</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_fyf_gkq_4bb-d16893e62693" class="topicref" data-id="concept_fyf_gkq_4bb" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_fyf_gkq_4bb-d16893e62693-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Edge_Mode/EdgePipelines_title.html" id="concept_fyf_gkq_4bb-d16893e62693-link"><span class="ph">StreamSets Data Collector Edge</span></a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_wwq_gxc_py-d16893e63980" class="topicref" data-id="concept_wwq_gxc_py" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_wwq_gxc_py-d16893e63980-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Multithreaded_Pipelines/MultithreadedPipelines.html#concept_wwq_gxc_py" id="concept_wwq_gxc_py-d16893e63980-link">Multithreaded Pipelines</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_gzw_tdm_p2b-d16893e64187" class="topicref" data-id="concept_gzw_tdm_p2b" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_gzw_tdm_p2b-d16893e64187-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Microservice/Microservice_Title.html#concept_gzw_tdm_p2b" id="concept_gzw_tdm_p2b-d16893e64187-link">Microservice Pipelines</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="Orchestrators_Title-d16893e64348" class="topicref" data-id="Orchestrators_Title" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action Orchestrators_Title-d16893e64348-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Orchestration_Pipelines/OrchestrationPipelines_Title.html#Orchestrators_Title" id="Orchestrators_Title-d16893e64348-link">Orchestration Pipelines</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_wr1_ktz_bt-d16893e64489" class="topicref" data-id="concept_wr1_ktz_bt" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_wr1_ktz_bt-d16893e64489-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/RPC_Pipelines/SDC_RPCpipelines_title.html#concept_wr1_ktz_bt" id="concept_wr1_ktz_bt-d16893e64489-link">SDC RPC Pipelines</a></div></div></li><li role="treeitem" aria-expanded="true"><div data-tocid="concept_fpz_5r4_vs-d16893e64679" class="topicref" data-id="concept_fpz_5r4_vs" data-state="expanded"><span role="button" tabindex="0" aria-labelledby="button-collapse-action concept_fpz_5r4_vs-d16893e64679-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines_title.html" id="concept_fpz_5r4_vs-d16893e64679-link">Cluster Pipelines</a></div></div><ul role="group" class="navbar-nav nav-list"><li role="treeitem" aria-expanded="false"><div data-tocid="concept_hmh_kfn_1s-d16893e64701" class="topicref" data-id="concept_hmh_kfn_1s" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_hmh_kfn_1s-d16893e64701-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/ClusterPipelines.html#concept_hmh_kfn_1s" id="concept_hmh_kfn_1s-d16893e64701-link">Cluster Pipelines (deprecated)</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="task_gmd_msw_yr-d16893e64935" class="topicref" data-id="task_gmd_msw_yr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action task_gmd_msw_yr-d16893e64935-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/KafkaRequirements.html#task_gmd_msw_yr" id="task_gmd_msw_yr-d16893e64935-link">Kafka Cluster Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_kry_gn5_lx-d16893e65003" class="topicref" data-id="concept_kry_gn5_lx" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_kry_gn5_lx-d16893e65003-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/MapRRequirements.html#concept_kry_gn5_lx" id="concept_kry_gn5_lx-d16893e65003-link">MapR Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem"><div data-tocid="task_akz_w5b_ws-d16893e65049" class="topicref" data-id="task_akz_w5b_ws" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/HDFSRequirements.html#task_akz_w5b_ws" id="task_akz_w5b_ws-d16893e65049-link">HDFS Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="true" class="active"><div data-tocid="concept_opj_jmf_f2b-d16893e65073" class="topicref" data-id="concept_opj_jmf_f2b" data-state="expanded"><span role="button" tabindex="0" aria-labelledby="button-collapse-action concept_opj_jmf_f2b-d16893e65073-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/AmazonS3Requirements.html#concept_opj_jmf_f2b" id="concept_opj_jmf_f2b-d16893e65073-link">Amazon S3 Requirements</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div><ul role="group" class="navbar-nav nav-list"><li role="treeitem"><div data-tocid="task_o3s_kb5_g2b-d16893e65097" class="topicref" data-id="task_o3s_kb5_g2b" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/AmazonS3Requirements.html#task_o3s_kb5_g2b" id="task_o3s_kb5_g2b-d16893e65097-link">Configuring Cluster EMR Batch Mode for Amazon S3</a><div class="wh-tooltip"><p class="shortdesc">Cluster EMR batch mode pipelines run on an Amazon EMR cluster to process data from         Amazon S3.</p></div></div></div></li><li role="treeitem"><div data-tocid="task_ejh_1d5_g2b-d16893e65122" class="topicref" data-id="task_ejh_1d5_g2b" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/AmazonS3Requirements.html#task_ejh_1d5_g2b" id="task_ejh_1d5_g2b-d16893e65122-link">Configuring Cluster Batch Mode for Amazon S3</a><div class="wh-tooltip"><p class="shortdesc">Cluster batch mode pipelines run on a Cloudera distribution of Hadoop (CDH) or         Hortonworks Data Platform (HDP) cluster to process data from Amazon S3.</p></div></div></div></li></ul></li><li role="treeitem"><div data-tocid="concept_pdf_r5y_fz-d16893e65147" class="topicref" data-id="concept_pdf_r5y_fz" data-state="leaf"><span role="button" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Cluster_Mode/StageLimitations.html#concept_pdf_r5y_fz" id="concept_pdf_r5y_fz-d16893e65147-link">Cluster Pipeline Limitations</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li></ul></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_jjk_23z_sq-d16893e65172" class="topicref" data-id="concept_jjk_23z_sq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_jjk_23z_sq-d16893e65172-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Data_Preview/DataPreview_Title.html#concept_jjk_23z_sq" id="concept_jjk_23z_sq-d16893e65172-link">Data Preview</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_pgk_brx_rr-d16893e65458" class="topicref" data-id="concept_pgk_brx_rr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_pgk_brx_rr-d16893e65458-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Alerts/RulesAlerts_title.html#concept_pgk_brx_rr" id="concept_pgk_brx_rr-d16893e65458-link">Rules and Alerts</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_asx_fdz_sq-d16893e65960" class="topicref" data-id="concept_asx_fdz_sq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_asx_fdz_sq-d16893e65960-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Monitoring/PipelineMonitoring_title.html#concept_asx_fdz_sq" id="concept_asx_fdz_sq-d16893e65960-link">Pipeline Monitoring</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_o3l_dtr_5q-d16893e66304" class="topicref" data-id="concept_o3l_dtr_5q" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_o3l_dtr_5q-d16893e66304-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Pipeline_Maintenance/PipelineMaintenance_title.html#concept_o3l_dtr_5q" id="concept_o3l_dtr_5q-d16893e66304-link">Pipeline Maintenance</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_yms_ftm_sq-d16893e66768" class="topicref" data-id="concept_yms_ftm_sq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_yms_ftm_sq-d16893e66768-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Administration/Administration_title.html#concept_yms_ftm_sq" id="concept_yms_ftm_sq-d16893e66768-link">Administration</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_nls_w1r_ks-d16893e67508" class="topicref" data-id="concept_nls_w1r_ks" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_nls_w1r_ks-d16893e67508-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Tutorial/Tutorial-title.html" id="concept_nls_w1r_ks-d16893e67508-link">Tutorial</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_sh3_frm_tq-d16893e68001" class="topicref" data-id="concept_sh3_frm_tq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_sh3_frm_tq-d16893e68001-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Troubleshooting/Troubleshooting_title.html#concept_sh3_frm_tq" id="concept_sh3_frm_tq-d16893e68001-link">Troubleshooting</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_xbx_rs1_tq-d16893e68798" class="topicref" data-id="concept_xbx_rs1_tq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_xbx_rs1_tq-d16893e68798-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Glossary/Glossary_title.html#concept_xbx_rs1_tq" id="concept_xbx_rs1_tq-d16893e68798-link">Glossary</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_jn1_nzb_kv-d16893e68843" class="topicref" data-id="concept_jn1_nzb_kv" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_jn1_nzb_kv-d16893e68843-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Apx-DataFormats/DataFormat_Title.html#concept_jn1_nzb_kv" id="concept_jn1_nzb_kv-d16893e68843-link">Data Formats by Stage</a><div class="wh-tooltip"><p class="shortdesc"></p></div></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_pvm_yt3_wq-d16893e68958" class="topicref" data-id="concept_pvm_yt3_wq" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_pvm_yt3_wq-d16893e68958-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Expression_Language/ExpressionLanguage_title.html" id="concept_pvm_yt3_wq-d16893e68958-link">Expression Language</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_vcj_1ws_js-d16893e69669" class="topicref" data-id="concept_vcj_1ws_js" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_vcj_1ws_js-d16893e69669-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Apx-RegEx/RegEx-Title.html#concept_vcj_1ws_js" id="concept_vcj_1ws_js-d16893e69669-link">Regular Expressions</a></div></div></li><li role="treeitem" aria-expanded="false"><div data-tocid="concept_chv_vmj_wr-d16893e69787" class="topicref" data-id="concept_chv_vmj_wr" data-state="not-ready"><span role="button" tabindex="0" aria-labelledby="button-expand-action concept_chv_vmj_wr-d16893e69787-link" class="wh-expand-btn"></span><div class="title"><a href="../../../datacollector/UserGuide/Apx-GrokPatterns/GrokPatterns_title.html#concept_chv_vmj_wr" id="concept_chv_vmj_wr-d16893e69787-link">Grok Patterns</a></div></div></li></ul></div>
                        

</div>
</nav>
                    


                    
                    <div id="wh_topic_body" class="col-lg-7 col-md-9 col-sm-12">
<button id="wh_close_publication_toc_button" class="close-toc-button d-none" aria-label="Toggle publishing table of content" aria-controls="wh_publication_toc" aria-expanded="true"><span class="close-toc-icon-container"><span class="close-toc-icon"></span></span></button><button id="wh_close_topic_toc_button" class="close-toc-button d-none" aria-label="Toggle topic table of content" aria-controls="wh_topic_toc" aria-expanded="true"><span class="close-toc-icon-container"><span class="close-toc-icon"></span></span></button>

                        
<div class=" wh_topic_content body "><main role="main"><article class="" role="article" aria-labelledby="ariaid-title1"><article class="nested0" aria-labelledby="ariaid-title1" id="concept_opj_jmf_f2b">
    <h1 class="- topic/title title topictitle1" id="ariaid-title1">Amazon S3 Requirements</h1>
    
    <div class="- topic/body concept/conbody body conbody"><p class="- topic/shortdesc shortdesc"></p>
        <p class="- topic/p p">Cluster EMR batch and cluster batch mode pipelines can process data from Amazon S3.</p>
        <p class="- topic/p p">The requirements for cluster pipelines that read from Amazon S3 depend on the following
            batch modes:</p>
        <dl class="- topic/dl dl">
            
                <dt class="- topic/dt dt dlterm">Cluster EMR batch mode</dt>
                <dd class="- topic/dd dd"> Cluster EMR batch mode pipelines use a Hadoop FS origin and run on an Amazon
                    EMR cluster to process data from Amazon S3. Cluster EMR batch mode pipelines
                    require a supported version of an Amazon EMR cluster with Hadoop. For a list of
                    the supported Amazon EMR and Hadoop versions, see <span class="- topic/ph ph"><a class="- topic/xref xref" href="../Installation/AddtionalStageLibs.html#concept_evs_xkm_s5">Available Stage Libraries</a></span>. </dd>
            
            
                <dt class="- topic/dt dt dlterm">Cluster batch mode</dt>
                <dd class="- topic/dd dd">Cluster batch mode pipelines use a Hadoop FS origin and run on a Cloudera
                    distribution of Hadoop (CDH) or Hortonworks Data Platform (HDP) cluster to
                    process data from Amazon S3. Cluster mode pipelines that read from HDFS require
                    a supported version of CDH or HDP. For a list of the supported CDH or HDP
                    versions, see <span class="- topic/ph ph"><a class="- topic/xref xref" href="../Installation/AddtionalStageLibs.html#concept_evs_xkm_s5">Available Stage Libraries</a></span>.</dd>
            
        </dl>
    </div>
<article class="- topic/topic task/task topic task nested1" aria-labelledby="ariaid-title2" id="task_o3s_kb5_g2b">
    <h2 class="- topic/title title topictitle2" id="ariaid-title2">Configuring Cluster EMR Batch Mode for Amazon S3</h2>
    
    <div class="- topic/body task/taskbody body taskbody"><p class="- topic/shortdesc shortdesc">Cluster EMR batch mode pipelines run on an Amazon EMR cluster to process data from
        Amazon S3.</p>
        <section class="- topic/section task/context section context">
                    <p class="- topic/p p">Cluster EMR batch mode pipelines can run on an existing Amazon EMR cluster or
                on a new EMR cluster that is provisioned when the pipeline starts. When you
                provision a new EMR cluster, you can configure whether the cluster remains active or
                terminates when the pipeline stops.</p>
                    <p class="- topic/p p">
                <span class="- topic/ph ph">Data Collector</span>
                can be installed on a gateway node in an existing Amazon EMR cluster. Or, it can be
                installed outside of the EMR cluster - on an on-premises machine or on another
                Amazon EC2 instance. Regardless of where <span class="- topic/ph ph">Data Collector</span>
                is installed, you'll likely need to modify the Amazon EMR security group to allow
                    <span class="- topic/ph ph">Data Collector</span>
                to access the master node in the EMR cluster. Security groups control inbound and
                outbound access to EMR cluster instances. For information on configuring security
                groups for Amazon EMR clusters, see the <a class="- topic/xref xref" href="https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-security-groups.html" target="_blank" rel="external noopener">Amazon EMR documentation</a>. </p>
                    <p class="- topic/p p"> All processors and destinations supported in cluster pipelines are
                        supported in a cluster EMR batch pipeline as long as network connectivity is
                        correctly configured from the Amazon EMR cluster to any external system that
                        the processors or destinations use. For example, if you include a JDBC
                        Lookup processor in a cluster EMR batch pipeline, you must ensure that the
                        Amazon EMR cluster can connect to the database. </p>
                
            <div class="- topic/note note note note_note"><span class="note__title">Note:</span> Cluster EMR batch mode pipelines do not support Kerberos authentication at this
                time.</div>
            <p class="- topic/p p">Complete the following steps to configure a cluster EMR batch mode pipeline to read
                from Amazon S3: </p>
        </section>
        <section class="- topic/ol task/steps ol steps"><ol class="- topic/ol task/steps ol steps"><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">In Amazon EMR, modify the master security group used by the EMR cluster to
                    allow <span class="- topic/ph ph">Data Collector</span> to access the master node in the cluster. </span>
                <div class="- topic/itemgroup task/info itemgroup info">For information on configuring security groups for EMR clusters, see the <a class="- topic/xref xref" href="https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-security-groups.html" target="_blank" rel="external noopener">Amazon EMR documentation</a>. </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">In the pipeline properties, on the <span class="+ topic/keyword ui-d/wintitle keyword wintitle">General</span> tab, set the
                        <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Execution Mode</span> property to <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Cluster EMR
                        Batch</span>.</span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Cluster</span> tab of the pipeline, configure the
                    following properties:</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <div class="table-container"><table class="- topic/table table frame-all" id="task_o3s_kb5_g2b__table_mkj_kdr_wr" data-ofbid="task_o3s_kb5_g2b__table_mkj_kdr_wr" data-cols="2"><caption></caption><colgroup><col style="width:22.22222222222222%"/><col style="width:77.77777777777779%"/></colgroup><thead class="- topic/thead thead">
                                <tr class="- topic/row">
                                    <th class="- topic/entry entry colsep-1 rowsep-1" id="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__1">Cluster Property</th>
                                    <th class="- topic/entry entry colsep-0 rowsep-1" id="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__2">Description</th>
                                </tr>
                            </thead><tbody class="- topic/tbody tbody">
                                <tr class="- topic/row">
                            <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__1">Worker Java Options</td>
                            <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__2">Additional Java properties for the pipeline. Separate properties
                                with a space.<p class="- topic/p p">The following properties are set by default. </p><div class="- topic/p p">
                                    <ul class="- topic/ul ul" id="task_o3s_kb5_g2b__ul_hf3_xqj_ws" data-ofbid="task_o3s_kb5_g2b__ul_hf3_xqj_ws">
                                        <li class="- topic/li li">XX:+UseConcMarkSweepGC and XX:+UseParNewGC are set to
                                            the Concurrent Mark Sweep (CMS) garbage collector.</li>
                                        <li class="- topic/li li">Dlog4j.debug enables debug logging for log4j.</li>
                                    </ul>
                                </div><p class="- topic/p p">Changing the default properties is not recommended.</p><p class="- topic/p p">You
                                    can add any valid Java property. </p></td>
                        </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__1">Log Level</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__2">Log level to use when the pipeline runs on the Amazon EMR
                                        cluster. Default is the INFO severity level.</td>
                                </tr>
                                <tr class="- topic/row">
                            <td class="- topic/entry entry colsep-1 rowsep-0" headers="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__1">Worker Memory (MB)</td>
                            <td class="- topic/entry entry colsep-0 rowsep-0" headers="task_o3s_kb5_g2b__table_mkj_kdr_wr__entry__2">Maximum amount of memory allocated to each <span class="- topic/ph ph">Data Collector</span> worker in the cluster.<p class="- topic/p p">Default is 1024 MB.</p></td>
                        </tr>
                            </tbody></table></div>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">EMR</span> tab of the pipeline, configure the following
                    properties:</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <div class="table-container"><table class="- topic/table table frame-all" id="task_o3s_kb5_g2b__table_zhq_lzv_g2b" data-ofbid="task_o3s_kb5_g2b__table_zhq_lzv_g2b" data-cols="2"><caption></caption><colgroup><col style="width:22.22222222222222%"/><col style="width:77.77777777777779%"/></colgroup><thead class="- topic/thead thead">
                                <tr class="- topic/row">
                                    <th class="- topic/entry entry colsep-1 rowsep-1" id="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__1">EMR Property</th>
                                    <th class="- topic/entry entry colsep-0 rowsep-1" id="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__2">Description</th>
                                </tr>
                            </thead><tbody class="- topic/tbody tbody">
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__1">Region</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__2">AWS region that contains the EMR cluster.<p class="- topic/p p">If the region
                                            does not display in the list, select
                                                <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Custom</span> and then enter the
                                            name of the AWS region.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__1">AWS Access Key</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__2">AWS access key ID.</td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__1">AWS Secret Key</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__2">AWS secret access key.<p class="- topic/p p">The pipeline uses the access key
                                            pair to pass credentials to Amazon Web Services to
                                            connect to the EMR cluster.</p><div class="- topic/note note tip note_tip"><span class="note__title">Tip:</span> To secure sensitive information such as
                  access key pairs, you can use <a class="- topic/xref xref" href="../Pipeline_Configuration/RuntimeValues.html#concept_bs4_5nm_2s" title="Similar to runtime properties, runtime resources are values that you define in a file local to the Data Collector and call from within a pipeline. But with runtime resources, you can restrict the permissions for the files to secure information.">runtime resources</a> or <span class="- topic/ph ph"><a class="- topic/xref xref" href="../Configuration/CredentialStores.html#concept_bt1_bpj_r1b">credential stores.</a></span></div></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__1">S3 Staging URI</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__2">Temporary staging location in Amazon S3 to store the
                                        resources and configuration files required to run the
                                        pipeline. <span class="- topic/ph ph">Data Collector</span> removes the contents from the folder when the pipeline
                                            stops.<p class="- topic/p p">Location must be unique for each pipeline. Use
                                            the following format:
                                            </p><pre class="+ topic/pre pr-d/codeblock pre codeblock"><code>s3://&lt;bucket&gt;/&lt;path&gt;</code></pre><p class="- topic/p p">The
                                            bucket must exist. If the folder in the specified path
                                            does not exist, it is created.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__1">Provision a New Cluster</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__2">Provisions a new EMR cluster when the pipeline
                                        starts.</td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-0" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__1">Cluster ID</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-0" headers="task_o3s_kb5_g2b__table_zhq_lzv_g2b__entry__2">ID of the existing EMR cluster.</td>
                                </tr>
                            </tbody></table></div>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">If you chose to provision a new EMR cluster, configure the following properties
                    on the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">EMR</span> tab of the pipeline.</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <p class="- topic/p p">For more information about the properties required to provision an EMR
                        cluster, see the <a class="- topic/xref xref" href="https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-gs.html" target="_blank" rel="external noopener">Amazon EMR documentation</a>.</p>
                    <div class="table-container"><table class="- topic/table table frame-all" id="task_o3s_kb5_g2b__table_ycg_f1w_g2b" data-ofbid="task_o3s_kb5_g2b__table_ycg_f1w_g2b" data-cols="2"><caption></caption><colgroup><col style="width:22.22222222222222%"/><col style="width:77.77777777777779%"/></colgroup><thead class="- topic/thead thead">
                                <tr class="- topic/row">
                                    <th class="- topic/entry entry colsep-1 rowsep-1" id="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">EMR Property to Provision New Cluster</th>
                                    <th class="- topic/entry entry colsep-0 rowsep-1" id="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Description</th>
                                </tr>
                            </thead><tbody class="- topic/tbody tbody">
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Cluster Name Prefix</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Prefix for the name of the provisioned EMR cluster.
                                            <div class="- topic/p p">The Data Collector ID and pipeline ID are appended to
                                            the prefix as
                                            follows:<pre class="+ topic/pre pr-d/codeblock pre codeblock"><code>&lt;prefix&gt;::&lt;sdc ID&gt;::&lt;pipeline ID&gt;</code></pre></div></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Terminate Cluster</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Terminates the cluster when the pipeline stops.<p class="- topic/p p">When
                                            cleared, the cluster remains active when the pipeline
                                            stops.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Logging Enabled</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Enables logging on the cluster.<p class="- topic/p p">When logging is
                                            enabled, Amazon EMR writes the cluster log files to the
                                            Amazon S3 location that you specify.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">S3 Log URI</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Location in Amazon S3 where the cluster writes log data.
                                            <p class="- topic/p p">Location must be unique for each pipeline. Use the
                                            following format:
                                            </p><pre class="+ topic/pre pr-d/codeblock pre codeblock"><code>s3://&lt;bucket&gt;/&lt;path&gt;</code></pre><p class="- topic/p p">The
                                            bucket must exist. If the folder in the specified path
                                            does not exist, it is created.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Enable Debugging</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Enables debugging on the cluster. <p class="- topic/p p">When debugging is
                                            enabled, you can use the Amazon EMR console to view the
                                            cluster log files.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Service Role</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">EMR role used by the cluster when provisioning resources
                                        and performing other service-level tasks.<p class="- topic/p p">Default is
                                            EMR_DefaultRole. For more information about configuring
                                            roles for Amazon EMR, see the <a class="- topic/xref xref" href="https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html" target="_blank" rel="external noopener">Amazon EMR
                                                documentation</a>.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Job Flow Role</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">EMR role for EC2 used by EC2 instances within the
                                            cluster.<p class="- topic/p p">Default is EMR_EC2_DefaultRole. For more
                                            information about configuring roles for Amazon EMR, see
                                            the <a class="- topic/xref xref" href="https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-iam-roles.html" target="_blank" rel="external noopener">Amazon EMR
                                                documentation</a>.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Visible to All Users</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Determines whether all AWS Identity and Access Management
                                        (IAM) users under your account can access the
                                        cluster.</td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">EC2 Subnet ID</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">EC2 subnet identifier to launch the cluster in.</td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Master Security Group</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Security group ID for the master node in the
                                            cluster.<div class="- topic/note note important note_important"><span class="note__title">Important:</span> Verify that the master
                                            security group allows <span class="- topic/ph ph">Data Collector</span> to access the master node in the EMR cluster. For
                                            information on configuring security groups for EMR
                                            clusters, see the <a class="- topic/xref xref" href="https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-security-groups.html" target="_blank" rel="external noopener">Amazon EMR
                                                documentation</a>.</div></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Slave Security Group</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Security group ID for the slave nodes in the
                                        cluster.</td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Instance Count</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Number of Amazon EC2 instances to initialize. Each
                                        instance corresponds to a slave node in the EMR
                                        cluster.</td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Master Instance Type</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Amazon EC2 instance type initialized for the master node
                                        in the EMR cluster.<p class="- topic/p p">If an instance type does not display
                                            in the list, select <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Custom</span> and
                                            then enter the instance type.</p></td>
                                </tr>
                                <tr class="- topic/row">
                                    <td class="- topic/entry entry colsep-1 rowsep-0" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__1">Slave Instance Type</td>
                                    <td class="- topic/entry entry colsep-0 rowsep-0" headers="task_o3s_kb5_g2b__table_ycg_f1w_g2b__entry__2">Amazon EC2 instance type initialized for the slave nodes
                                        in the EMR cluster.<p class="- topic/p p">If an instance type does not display
                                            in the list, select <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Custom</span> and
                                            then enter the instance type.</p></td>
                                </tr>
                            </tbody></table></div>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">In the pipeline, use the Hadoop FS origin for cluster EMR batch mode.</span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/keyword ui-d/wintitle keyword wintitle">General</span> tab of the origin, select the appropriate
                    EMR stage library for cluster EMR batch mode.</span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Hadoop FS</span> tab of the origin, configure the Hadoop
                    FS URI property to point to the Amazon S3 bucket to read from. </span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <p class="- topic/p p">Use the following format: <code class="+ topic/ph pr-d/codeph ph codeph">s3a://&lt;bucket&gt;</code></p><p class="- topic/p p">For example:<code class="+ topic/ph pr-d/codeph ph codeph">s3a://WebServer</code></p><div class="- topic/p p">Then in the Input Paths property, enter the full path to the data to be read within that
            Amazon S3 bucket. You can enter multiple paths for the Input Paths property, for
                example:<ul class="- topic/ul ul" id="task_o3s_kb5_g2b__ul_fbn_rgz_h2b" data-ofbid="task_o3s_kb5_g2b__ul_fbn_rgz_h2b">
                <li class="- topic/li li">Input Path 1 - <code class="+ topic/ph pr-d/codeph ph codeph">/2016/February</code></li>
                <li class="- topic/li li">Input Path 2 - <code class="+ topic/ph pr-d/codeph ph codeph">/2016/March</code></li>
            </ul></div>
                    <p class="- topic/p p">For more information, see <a class="- topic/xref xref" href="../Origins/HadoopFS-origin.html#concept_ud1_wd2_h2b" title="The Hadoop FS origin included in a cluster batch or cluster EMR batch pipeline allows you to read from Amazon S3.">Reading from Amazon S3</a>.</p>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">S3</span> tab of the origin, enter the same access key
                    pair that you entered on the EMR tab of the pipeline.</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <p class="- topic/p p">The origin uses the access key pair to pass credentials to Amazon Web
                        Services to read from Amazon S3.</p>
                    <div class="- topic/note note tip note_tip"><span class="note__title">Tip:</span> To secure sensitive information such as
                  access key pairs, you can use <a class="- topic/xref xref" href="../Pipeline_Configuration/RuntimeValues.html#concept_bs4_5nm_2s" title="Similar to runtime properties, runtime resources are values that you define in a file local to the Data Collector and call from within a pipeline. But with runtime resources, you can restrict the permissions for the files to secure information.">runtime resources</a> or <span class="- topic/ph ph"><a class="- topic/xref xref" href="../Configuration/CredentialStores.html#concept_bt1_bpj_r1b">credential stores.</a></span></div>
                </div>
            </li></ol></section>
    </div>
</article><article class="- topic/topic task/task topic task nested1" aria-labelledby="ariaid-title3" id="task_ejh_1d5_g2b">
    <h2 class="- topic/title title topictitle2" id="ariaid-title3">Configuring Cluster Batch Mode for Amazon S3</h2>
    
    <div class="- topic/body task/taskbody body taskbody"><p class="- topic/shortdesc shortdesc">Cluster batch mode pipelines run on a Cloudera distribution of Hadoop (CDH) or
        Hortonworks Data Platform (HDP) cluster to process data from Amazon S3.</p>
        <section class="- topic/section task/context section context">
            <p class="- topic/p p">Complete the following steps to configure a cluster batch mode pipeline to read from
                Amazon S3: </p>
        </section>
        <section class="- topic/ol task/steps ol steps"><ol class="- topic/ol task/steps ol steps"><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">Verify the installation of HDFS and YARN.</span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">Install <span class="- topic/ph ph">Data Collector</span> on a YARN gateway node.</span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">Grant the user defined in the user environment variable write permission on
                        <span class="+ topic/ph sw-d/filepath ph filepath">/user/$SDC_USER</span>.</span>
                <div class="- topic/itemgroup task/info itemgroup info">The user environment variable defines the system
                    user used to run Data Collector as a service. The file that defines the user
                    environment variable depends on your operating system. For more information, <span class="- topic/ph ph">see <a class="- topic/xref xref" href="../Configuration/DCEnvironmentConfig.html#concept_htz_t1s_3v" title="When you run Data Collector as a service, Data Collector runs as the system user account and group defined in environment variables. The default system user and group are named sdc.">User and Group for Service Start</a></span>. </div>
                <div class="- topic/itemgroup task/info itemgroup info">For example, say the user environment
                    variable is defined as <span class="+ topic/ph sw-d/filepath ph filepath">sdc</span> and the cluster does not use
                    Kerberos. Then you might use the following commands to create the directory and
                    configure the necessary write
                    permissions:<pre class="+ topic/pre pr-d/codeblock pre codeblock" id="task_ejh_1d5_g2b__Cluster-Code-SDCUserCode" data-ofbid="task_ejh_1d5_g2b__Cluster-Code-SDCUserCode"><code>$sudo -u hdfs hadoop fs -mkdir /user/sdc
$sudo -u hdfs hadoop fs -chown sdc /user/sdc</code></pre></div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">To enable <span class="- topic/ph ph">Data Collector</span> to submit YARN jobs, perform one of the following tasks:</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <ul class="- topic/ul ul" id="task_ejh_1d5_g2b__ul_ult_d5p_qz" data-ofbid="task_ejh_1d5_g2b__ul_ult_d5p_qz">
                        <li class="- topic/li li">On YARN, set the min.user.id to a value equal to or lower than the user
                            ID associated with the <span class="- topic/ph ph">Data Collector</span> user ID,
                            typically named "sdc".</li>
                        <li class="- topic/li li">On YARN, add the <span class="- topic/ph ph">Data Collector</span> user name,
                            typically "sdc", to the allowed.system.users property.</li>
                    </ul>
                    <ul class="- topic/ul ul" id="task_ejh_1d5_g2b__ul_qf3_r1j_cy" data-ofbid="task_ejh_1d5_g2b__ul_qf3_r1j_cy">
                        <li class="- topic/li li">After you create the pipeline, specify a Hadoop FS user in the Hadoop FS
                            origin. <p class="- topic/p p">For the Hadoop FS User property, enter a user with an ID that
                                is higher than the min.user.id property, or with a user name that is
                                listed in the allowed.system.users property. </p></li>
                    </ul>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On YARN, verify that the Hadoop logging level is set to a severity of INFO or
                    lower. </span>
                <div class="- topic/itemgroup task/info itemgroup info">YARN sets the Hadoop logging level to INFO by default. To change the logging
                        level:<ol class="- topic/ol ol" type="a" id="task_ejh_1d5_g2b__ol_f33_ghv_gy" data-ofbid="task_ejh_1d5_g2b__ol_f33_ghv_gy">
                        <li class="- topic/li li">Edit the log4j.properties file. <div class="- topic/p p">By default, the file is located in
                                the following directory:
                            <pre class="+ topic/pre pr-d/codeblock pre codeblock"><code>/etc/hadoop/conf</code></pre></div></li>
                        <li class="- topic/li li">Set the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">log4j.rootLogger</span> property to a severity
                            of INFO or lower, such as DEBUG or TRACE.</li>
                    </ol></div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">If YARN is configured to use Kerberos authentication, configure <span class="- topic/ph ph">Data Collector</span> to use Kerberos
                    authentication. </span>
                <div class="- topic/itemgroup task/info itemgroup info">When you configure Kerberos authentication for <span class="- topic/ph ph">Data Collector</span>, you enable <span class="- topic/ph ph">Data Collector</span> to use Kerberos
                    and define the principal and keytab. <div class="- topic/note note important note_important" id="task_ejh_1d5_g2b__Cluster-KerbNote" data-ofbid="task_ejh_1d5_g2b__Cluster-KerbNote"><span class="note__title">Important:</span> For cluster pipelines, enter an absolute path to the
                        keytab when configuring <span class="- topic/ph ph">Data Collector</span>. Standalone
                        pipelines do not require an absolute path.</div></div>
                <div class="- topic/itemgroup task/info itemgroup info">Once enabled, <span class="- topic/ph ph">Data Collector</span>
                    automatically uses the Kerberos principal and keytab to connect to any YARN
                    cluster that uses Kerberos. <span class="- topic/ph ph">For more information about enabling Kerberos authentication
                        for <span class="- topic/ph ph">Data Collector</span>, see <a class="- topic/xref xref" href="../Configuration/DCConfig.html#concept_hnm_n4l_xs" title="You can use Kerberos authentication to connect to external systems as well as YARN clusters.">Kerberos Authentication</a>.</span></div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">In the pipeline properties, on the <span class="+ topic/keyword ui-d/wintitle keyword wintitle">General</span> tab, set the
                        <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Execution Mode</span> property to <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Cluster
                        Batch</span>.</span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Cluster</span> tab, configure the following
                    properties:</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <div class="table-container"><table class="- topic/table table frame-all" id="task_ejh_1d5_g2b__table_mkj_kdr_wr" data-ofbid="task_ejh_1d5_g2b__table_mkj_kdr_wr" data-cols="2"><caption></caption><colgroup><col style="width:22.22222222222222%"/><col style="width:77.77777777777779%"/></colgroup><thead class="- topic/thead thead">
                                <tr class="- topic/row">
                                    <th class="- topic/entry entry colsep-1 rowsep-1" id="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__1">Cluster Property</th>
                                    <th class="- topic/entry entry colsep-0 rowsep-1" id="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__2">Description</th>
                                </tr>
                            </thead><tbody class="- topic/tbody tbody">
                                <tr class="- topic/row">
                            <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__1">Worker Java Options</td>
                            <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__2">Additional Java properties for the pipeline. Separate properties
                                with a space.<p class="- topic/p p">The following properties are set by default. </p><div class="- topic/p p">
                                    <ul class="- topic/ul ul" id="task_ejh_1d5_g2b__ul_hf3_xqj_ws" data-ofbid="task_ejh_1d5_g2b__ul_hf3_xqj_ws">
                                        <li class="- topic/li li">XX:+UseConcMarkSweepGC and XX:+UseParNewGC are set to
                                            the Concurrent Mark Sweep (CMS) garbage collector.</li>
                                        <li class="- topic/li li">Dlog4j.debug enables debug logging for log4j.</li>
                                    </ul>
                                </div><p class="- topic/p p">Changing the default properties is not recommended.</p><p class="- topic/p p">You
                                    can add any valid Java property. </p></td>
                        </tr>
                                <tr class="- topic/row">
                            <td class="- topic/entry entry colsep-1 rowsep-1" headers="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__1">Launcher Env Configuration</td>
                            <td class="- topic/entry entry colsep-0 rowsep-1" headers="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__2">
                                <p class="- topic/p p">Additional configuration properties for the cluster launcher.
                                    Using <a class="- topic/xref xref" href="../Pipeline_Configuration/SimpleBulkEdit.html#concept_alb_b3y_cbb">simple or bulk edit mode</a>, click the
                                        <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Add</span> icon and define the property name
                                    and value. </p>
                            </td>
                        </tr>
                                <tr class="- topic/row">
                            <td class="- topic/entry entry colsep-1 rowsep-0" headers="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__1">Worker Memory (MB)</td>
                            <td class="- topic/entry entry colsep-0 rowsep-0" headers="task_ejh_1d5_g2b__table_mkj_kdr_wr__entry__2">Maximum amount of memory allocated to each <span class="- topic/ph ph">Data Collector</span> worker in the cluster.<p class="- topic/p p">Default is 1024 MB.</p></td>
                        </tr>
                            </tbody></table></div>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">In the pipeline, use the Hadoop FS origin for cluster batch mode.</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <p class="- topic/p p">For more information about the origin, see <a class="- topic/xref xref" href="../Origins/HadoopFS-origin.html#concept_lw2_tnm_vs">Hadoop FS (deprecated)</a>.</p>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/keyword ui-d/wintitle keyword wintitle">General</span> tab of the origin, select the appropriate
                    CDH or HDP stage library for cluster mode.</span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Hadoop FS</span> tab of the origin, configure the Hadoop
                    FS URI property to point to the Amazon S3 bucket to read from. </span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <p class="- topic/p p">Use the following format: <code class="+ topic/ph pr-d/codeph ph codeph">s3a://&lt;bucket&gt;</code></p><p class="- topic/p p">For example:<code class="+ topic/ph pr-d/codeph ph codeph">s3a://WebServer</code></p><div class="- topic/p p">Then in the Input Paths property, enter the full path to the data to be read within that
            Amazon S3 bucket. You can enter multiple paths for the Input Paths property, for
                example:<ul class="- topic/ul ul" id="task_ejh_1d5_g2b__ul_fbn_rgz_h2b" data-ofbid="task_ejh_1d5_g2b__ul_fbn_rgz_h2b">
                <li class="- topic/li li">Input Path 1 - <code class="+ topic/ph pr-d/codeph ph codeph">/2016/February</code></li>
                <li class="- topic/li li">Input Path 2 - <code class="+ topic/ph pr-d/codeph ph codeph">/2016/March</code></li>
            </ul></div>
                    <p class="- topic/p p">For more information, see <a class="- topic/xref xref" href="../Origins/HadoopFS-origin.html#concept_ud1_wd2_h2b" title="The Hadoop FS origin included in a cluster batch or cluster EMR batch pipeline allows you to read from Amazon S3.">Reading from Amazon S3</a>.</p>
                </div>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Hadoop FS</span> tab of the origin, enable the
                        <span class="+ topic/ph ui-d/uicontrol ph uicontrol">Kerberos Authentication</span> property if YARN is
                    configured to use Kerberos authentication. </span>
            </li><li class="- topic/li task/step li step">
                <span class="- topic/ph task/cmd ph cmd">On the <span class="+ topic/ph ui-d/uicontrol ph uicontrol">S3</span> tab of the origin, enter the AWS access key
                    pair used to pass credentials to Amazon Web Services to read from Amazon
                    S3.</span>
                <div class="- topic/itemgroup task/info itemgroup info">
                    <div class="- topic/note note tip note_tip"><span class="note__title">Tip:</span> To secure sensitive information such as
                  access key pairs, you can use <a class="- topic/xref xref" href="../Pipeline_Configuration/RuntimeValues.html#concept_bs4_5nm_2s" title="Similar to runtime properties, runtime resources are values that you define in a file local to the Data Collector and call from within a pipeline. But with runtime resources, you can restrict the permissions for the files to secure information.">runtime resources</a> or <span class="- topic/ph ph"><a class="- topic/xref xref" href="../Configuration/CredentialStores.html#concept_bt1_bpj_r1b">credential stores.</a></span></div>
                </div>
            </li></ol></section>
    </div>
</article></article></article></main></div>

                        
                        
                        


                    </div>
                    
                </div>
            </div>


        </div> <nav class="navbar navbar-default wh_footer" data-whc_version="25.0">
  <div class=" footer-container  mx-auto">
    <!-- script for Data Collector, all flavors, but only used when accessed directly, not from portal --><script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-60917135-3', 'auto');
  ga('send', 'pageview');
</script>
  </div>
</nav>

        
        <div id="go2top">
            <span class="oxy-icon oxy-icon-up"></span>
        </div>
        
        <!-- The modal container for images -->
        <div id="modal_img_large" class="modal">
            <span class="close oxy-icon oxy-icon-remove"></span>
            <!-- Modal Content (The Image) -->
            <div id="modal_img_container"></div>
            <!-- Modal Caption (Image Text) -->
            <div id="caption"></div>
        </div>
        
        
        Â© 2023 StreamSets, Inc.

    </body>
</html>